suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(MASS))
suppressPackageStartupMessages(library(xtable))
suppressPackageStartupMessages(library(lattice))
suppressPackageStartupMessages(library(latticeExtra))
suppressPackageStartupMessages(library(car))
suppressPackageStartupMessages(library(beepr))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(lmerTest))
suppressPackageStartupMessages(library(LMERConvenienceFunctions)) # for perSubjectTrim
suppressPackageStartupMessages(library(languageR))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(grid))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(rms))                      # for varclus
suppressPackageStartupMessages(library(languageR))
source('CIfunctions.R')
cat(paste('number of subjects = ',30,'\n',
          '---','\n',
          'number of trials per subject = ',256,'\n', 
          '---','\n',          
          'number of unique cells in the design = Item(4) * Number(2) * Vagueness(2) * Order(2) * Quantity(2) = ',4*2*2*2*2,'\n',
          'number of times a subject saw a unique cell in the design = 256 / 64 = ', 256/64,'\n',
          '---','\n',
          'number of higher-level cells in the design (abstracting over Order and Quantity) = Item(4) * Number(2) * Vagueness(2) = ',4*2*2,'\n',
          'number of times a subject saw a higher level cell in the design =  = 256 / 16 = ', 4*2*2, '\n',
          sep=''))
## number of subjects = 30
## ---
## number of trials per subject = 256
## ---
## number of unique cells in the design = Item(4) * Number(2) * Vagueness(2) * Order(2) * Quantity(2) = 64
## number of times a subject saw a unique cell in the design = 256 / 64 = 4
## ---
## number of higher-level cells in the design (abstracting over Order and Quantity) = Item(4) * Number(2) * Vagueness(2) = 16
## number of times a subject saw a higher level cell in the design =  = 256 / 16 = 16

Distributional stuff, including transformations.

x=445
lambda = c(-2,-1,-0.5, 0,0.5,1,2)

xd=data.frame(
  lambda = c(
    lambda[1],
    lambda[2],
    lambda[3],
    lambda[4],
    lambda[5],
    lambda[6],
    lambda[7]
  ),
  x=c(    
    paste("x = ", sep="", x),
    paste("x = ", sep="", x),
    paste("x = ", sep="", x),
    paste("x = ", sep="", x),
    paste("x = ", sep="", x),
    paste("x = ", sep="", x),
    paste("x = ", sep="", x)
     ),
  conventional=c(
    '$$y=\\frac{1}{x^2}$$', 
    '$$y=\\frac{1}{x}$$', 
    '$$y=\\frac{1}{\\sqrt{x}}$$', 
    '$$y=log_e(x)$$', 
    '$$y=\\sqrt{x}$$', 
    '$$y=x$$', 
    '$$y=x^2$$'
    ),
  power=c(
    '$$y=x^{-2}$$', 
    '$$y=x^{-1}$$', 
    '$$y=x^{-0.5}$$', 
    '$$y=log_e(x)$$', 
    '$$y=x^{0.5}$$', 
    '$$y=x^{1}$$', 
    '$$y=x^{2}$$'
    ),
  y=c(     
    paste("y = ", sep="", format(round(x^lambda[1],6), sc=F) ),
    paste("y = ", sep="", round(x^lambda[2],3) ),
    paste("y = ", sep="", round(x^lambda[3],3) ),
    paste("y = ", sep="", round(log(x),3) ),
    paste("y = ", sep="", round(x^lambda[5],2) ),
    paste("y = ", sep="", round(x^lambda[6],6) ),
    paste("y = ", sep="", round(x^lambda[7],6) )
  ),
  back = c(
    '$$x=y^{\\frac{1}{-2}}$$',
    '$$x=y^{\\frac{1}{-1}}$$', 
    '$$x=y^{\\frac{1}{-0.5}}$$', 
    '$$x=exp(y)$$',  
    '$$x=y^{\\frac{1}{0.5}}$$', 
    '$$x=y^{\\frac{1}{1}}$$',  
    '$$x=y^{\\frac{1}{2}}$$'
  ),
  back_lambda=c(
    '$$x=y^{1/\\lambda}$$',
    '$$x=y^{1/\\lambda}$$',
    '$$x=y^{1/\\lambda}$$',
    '$$x=exp(y)$$',  
    '$$x=y^{1/\\lambda}$$',
    '$$x=y^{1/\\lambda}$$',
    '$$x=y^{1/\\lambda}$$'
  ),
  y_back=c(  
    paste( 'x = ', (x^lambda[1]) ^ {1/lambda[1]} ,sep=""),
    paste( 'x = ', (x^lambda[2]) ^ {1/lambda[2]} ,sep=""),
    paste( 'x = ', (x^lambda[3]) ^ {1/lambda[3]} ,sep=""),
    paste( 'x = ', exp(log(x)), sep=""),
    paste( 'x = ', (x^lambda[5]) ^ {1/lambda[5]} ,sep=""),
    paste( 'x = ', (x^lambda[6]) ^ {1/lambda[6]} ,sep=""),
    paste( 'x = ', (x^lambda[7]) ^ {1/lambda[7]} ,sep="")
  )
) 

print(xtable(xd, digits=1, align=c(rep('l', times=ncol(xd)+1))), type='html', include.rownames=F, digits=1)
lambda x conventional power y back back_lambda y_back
-2.0 x = 445 \[y=\frac{1}{x^2}\] \[y=x^{-2}\] y = 0.000005 \[x=y^{\frac{1}{-2}}\] \[x=y^{1/\lambda}\] x = 445
-1.0 x = 445 \[y=\frac{1}{x}\] \[y=x^{-1}\] y = 0.002 \[x=y^{\frac{1}{-1}}\] \[x=y^{1/\lambda}\] x = 445
-0.5 x = 445 \[y=\frac{1}{\sqrt{x}}\] \[y=x^{-0.5}\] y = 0.047 \[x=y^{\frac{1}{-0.5}}\] \[x=y^{1/\lambda}\] x = 445
0.0 x = 445 \[y=log_e(x)\] \[y=log_e(x)\] y = 6.098 \[x=exp(y)\] \[x=exp(y)\] x = 445
0.5 x = 445 \[y=\sqrt{x}\] \[y=x^{0.5}\] y = 21.1 \[x=y^{\frac{1}{0.5}}\] \[x=y^{1/\lambda}\] x = 445
1.0 x = 445 \[y=x\] \[y=x^{1}\] y = 445 \[x=y^{\frac{1}{1}}\] \[x=y^{1/\lambda}\] x = 445
2.0 x = 445 \[y=x^2\] \[y=x^{2}\] y = 198025 \[x=y^{\frac{1}{2}}\] \[x=y^{1/\lambda}\] x = 445

Load the file dat.Rda if it exists, else Get the data if file dat.Rda does not already exist.

# if the output file dat.Rda already exists then load it, else do data wrangling
if( file.exists('dat.Rda') && file.exists('dd.Rda') ) {
    load('dat.Rda'); load('dd.Rda')
    } else {
    gatherData = function(number_of_valid_subjects) {
        data_dir <- '../experimentCode/output/'
        column.headers.df <-  head( read.table(
            paste(data_dir,'subject1.data',sep=''),
            header=TRUE),0)
        gathered.data <- column.headers.df
        for (subject in 1:number_of_valid_subjects) {
            current.filename <- paste(data_dir, 'subject', subject, '.data', sep='')
            current.data <- read.table(file=current.filename, header=TRUE, stringsAsFactors = FALSE)
            gathered.data <- rbind(gathered.data, current.data)
        }
        return(gathered.data)
    } # end of gatherData function
    classifyResponse = function(dat) {
        # what were they expected to respond?
        dat$crossed = as.factor(paste('Con', dat$Condition, ':Quan', dat$Quantity, ':Item', dat$Item, sep=''))
        dat[dat$crossed=='Con1:Quan1:Item1', 'Exp_Num'] <-  6
        dat[dat$crossed=='Con1:Quan1:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con1:Quan1:Item1', 'Extr_Num']      <- 24
        dat[dat$crossed=='Con1:Quan1:Item2', 'Exp_Num'] <- 16
        dat[dat$crossed=='Con1:Quan1:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con1:Quan1:Item2', 'Extr_Num']      <- 34
        dat[dat$crossed=='Con1:Quan1:Item3', 'Exp_Num'] <- 26
        dat[dat$crossed=='Con1:Quan1:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con1:Quan1:Item3', 'Extr_Num']      <- 44
        dat[dat$crossed=='Con1:Quan1:Item4', 'Exp_Num'] <- 36
        dat[dat$crossed=='Con1:Quan1:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con1:Quan1:Item4', 'Extr_Num']      <- 54
        dat[dat$crossed=='Con1:Quan2:Item1', 'Exp_Num'] <- 24
        dat[dat$crossed=='Con1:Quan2:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con1:Quan2:Item1', 'Extr_Num']      <-  6
        dat[dat$crossed=='Con1:Quan2:Item2', 'Exp_Num'] <- 34
        dat[dat$crossed=='Con1:Quan2:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con1:Quan2:Item2', 'Extr_Num']      <- 16
        dat[dat$crossed=='Con1:Quan2:Item3', 'Exp_Num'] <- 44
        dat[dat$crossed=='Con1:Quan2:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con1:Quan2:Item3', 'Extr_Num']      <- 26
        dat[dat$crossed=='Con1:Quan2:Item4', 'Exp_Num'] <- 54
        dat[dat$crossed=='Con1:Quan2:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con1:Quan2:Item4', 'Extr_Num']      <- 36
        dat[dat$crossed=='Con2:Quan1:Item1', 'Exp_Num'] <-  6
        dat[dat$crossed=='Con2:Quan1:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con2:Quan1:Item1', 'Extr_Num']      <- 24
        dat[dat$crossed=='Con2:Quan1:Item2', 'Exp_Num'] <- 16
        dat[dat$crossed=='Con2:Quan1:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con2:Quan1:Item2', 'Extr_Num']      <- 34
        dat[dat$crossed=='Con2:Quan1:Item3', 'Exp_Num'] <- 26
        dat[dat$crossed=='Con2:Quan1:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con2:Quan1:Item3', 'Extr_Num']      <- 44
        dat[dat$crossed=='Con2:Quan1:Item4', 'Exp_Num'] <- 36
        dat[dat$crossed=='Con2:Quan1:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con2:Quan1:Item4', 'Extr_Num']      <- 54
        dat[dat$crossed=='Con2:Quan2:Item1', 'Exp_Num'] <- 24
        dat[dat$crossed=='Con2:Quan2:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con2:Quan2:Item1', 'Extr_Num']      <-  6
        dat[dat$crossed=='Con2:Quan2:Item2', 'Exp_Num'] <- 34
        dat[dat$crossed=='Con2:Quan2:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con2:Quan2:Item2', 'Extr_Num']      <- 16
        dat[dat$crossed=='Con2:Quan2:Item3', 'Exp_Num'] <- 44
        dat[dat$crossed=='Con2:Quan2:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con2:Quan2:Item3', 'Extr_Num']      <- 26
        dat[dat$crossed=='Con2:Quan2:Item4', 'Exp_Num'] <- 54
        dat[dat$crossed=='Con2:Quan2:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con2:Quan2:Item4', 'Extr_Num']      <- 36
        dat[dat$crossed=='Con3:Quan1:Item1', 'Exp_Num'] <-  6
        dat[dat$crossed=='Con3:Quan1:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con3:Quan1:Item1', 'Extr_Num']      <- 24
        dat[dat$crossed=='Con3:Quan1:Item2', 'Exp_Num'] <- 16
        dat[dat$crossed=='Con3:Quan1:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con3:Quan1:Item2', 'Extr_Num']      <- 34
        dat[dat$crossed=='Con3:Quan1:Item3', 'Exp_Num'] <- 26
        dat[dat$crossed=='Con3:Quan1:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con3:Quan1:Item3', 'Extr_Num']      <- 44
        dat[dat$crossed=='Con3:Quan1:Item4', 'Exp_Num'] <- 36
        dat[dat$crossed=='Con3:Quan1:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con3:Quan1:Item4', 'Extr_Num']      <- 54
        dat[dat$crossed=='Con3:Quan2:Item1', 'Exp_Num'] <- 24
        dat[dat$crossed=='Con3:Quan2:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con3:Quan2:Item1', 'Extr_Num']      <-  6
        dat[dat$crossed=='Con3:Quan2:Item2', 'Exp_Num'] <- 34
        dat[dat$crossed=='Con3:Quan2:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con3:Quan2:Item2', 'Extr_Num']      <- 16
        dat[dat$crossed=='Con3:Quan2:Item3', 'Exp_Num'] <- 44
        dat[dat$crossed=='Con3:Quan2:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con3:Quan2:Item3', 'Extr_Num']      <- 26
        dat[dat$crossed=='Con3:Quan2:Item4', 'Exp_Num'] <- 54
        dat[dat$crossed=='Con3:Quan2:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con3:Quan2:Item4', 'Extr_Num']      <- 36
        dat[dat$crossed=='Con4:Quan1:Item1', 'Exp_Num'] <-  6
        dat[dat$crossed=='Con4:Quan1:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con4:Quan1:Item1', 'Extr_Num']      <- 24
        dat[dat$crossed=='Con4:Quan1:Item2', 'Exp_Num'] <- 16
        dat[dat$crossed=='Con4:Quan1:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con4:Quan1:Item2', 'Extr_Num']      <- 34
        dat[dat$crossed=='Con4:Quan1:Item3', 'Exp_Num'] <- 26
        dat[dat$crossed=='Con4:Quan1:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con4:Quan1:Item3', 'Extr_Num']      <- 44
        dat[dat$crossed=='Con4:Quan1:Item4', 'Exp_Num'] <- 36
        dat[dat$crossed=='Con4:Quan1:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con4:Quan1:Item4', 'Extr_Num']      <- 54
        dat[dat$crossed=='Con4:Quan2:Item1', 'Exp_Num'] <- 24
        dat[dat$crossed=='Con4:Quan2:Item1', 'Bline_Num']     <- 15
        dat[dat$crossed=='Con4:Quan2:Item1', 'Extr_Num']      <-  6
        dat[dat$crossed=='Con4:Quan2:Item2', 'Exp_Num'] <- 34
        dat[dat$crossed=='Con4:Quan2:Item2', 'Bline_Num']     <- 25
        dat[dat$crossed=='Con4:Quan2:Item2', 'Extr_Num']      <- 16
        dat[dat$crossed=='Con4:Quan2:Item3', 'Exp_Num'] <- 44
        dat[dat$crossed=='Con4:Quan2:Item3', 'Bline_Num']     <- 35
        dat[dat$crossed=='Con4:Quan2:Item3', 'Extr_Num']      <- 26
        dat[dat$crossed=='Con4:Quan2:Item4', 'Exp_Num'] <- 54
        dat[dat$crossed=='Con4:Quan2:Item4', 'Bline_Num']     <- 45
        dat[dat$crossed=='Con4:Quan2:Item4', 'Extr_Num']      <- 36
        dat$crossed <- NULL
        # what side LEFT, MIDDLE, RIGHT corresponds with Expected, Borderline, Extreme?
        for (row in 1:nrow(dat)) {
            if (dat[row, 'Exp_Num']   == dat[row, 'Left'])  {dat[row, 'Exp_side']   <- 'left'}
            if (dat[row, 'Exp_Num']   == dat[row, 'Mid'])   {dat[row, 'Exp_side']   <- 'mid'}
            if (dat[row, 'Exp_Num']   == dat[row, 'Right']) {dat[row, 'Exp_side']   <- 'right'}
            if (dat[row, 'Bline_Num'] == dat[row, 'Left'])  {dat[row, 'Bline_side'] <- 'left'}
            if (dat[row, 'Bline_Num'] == dat[row, 'Mid'])   {dat[row, 'Bline_side'] <- 'mid'}
            if (dat[row, 'Bline_Num'] == dat[row, 'Right']) {dat[row, 'Bline_side'] <- 'right'}
            if (dat[row, 'Extr_Num']  == dat[row, 'Left'])  {dat[row, 'Extr_side']  <- 'left'}
            if (dat[row, 'Extr_Num']  == dat[row, 'Mid'])   {dat[row, 'Extr_side']  <- 'mid'}
            if (dat[row, 'Extr_Num']  == dat[row, 'Right']) {dat[row, 'Extr_side']  <- 'right'}
        }
        # what button press did the subject actually make? LEFT, MIDDLE, RIGHT, NOANSWER?
        dat$RESPONSE <- as.factor(dat$RESPONSE)
        # what number of dots corresponds with the subject's button press?
        for (row in 1 : nrow(dat) ) {
            switch(as.character(dat[row,'RESPONSE']),
                         'LEFT' = {dat[row, 'response_num'] <- dat[row, 'Left']},
                         'MIDDLE' = {dat[row, 'response_num'] <- dat[row, 'Mid']},
                         'RIGHT' = {dat[row, 'response_num'] <- dat[row, 'Right']},
                         'NOANSWER' = {dat[row, 'response_num'] <- NA}
            )
        }
        # what side was the subject's button-press? Left, mid right?
        dat$response_side <- tolower(dat$RESPONSE)
        dat$response_side[dat$response_side == "middle"] <- 'mid'
        dat$response_side <- factor(dat$response_side, exclude="noanswer")
        # what category was the subject's response? Expected, Borderline, Extreme
        dat$response_category <- "nocat"
        for (row in row.names(na.omit(dat))) {
            if (dat[row, 'response_num'] == dat[row, 'Exp_Num']) {dat[row, 'response_category'] <- 'expected'}
            if (dat[row, 'response_num'] == dat[row, 'Bline_Num']) {dat[row, 'response_category'] <- 'borderline'}
            if (dat[row, 'response_num'] == dat[row, 'Extr_Num']) {dat[row, 'response_category'] <- 'extreme'}
        }
        dat$response_category <- factor(dat$response_category, exclude="nocat")
        dat$RESPONSE <- NULL
        return(dat)
    } # end of classifyResponse function

    # declare local variables
    number_of_valid_subjects <- 30 # = 30
    number_of_rows <- 7680 # 7680
    number_of_trials_per_subject <- number_of_rows / number_of_valid_subjects # = 256
    dat <- gatherData(number_of_valid_subjects) # = 30
    dat <- classifyResponse(dat) # classify the response as expected, near, or far

    # SUBJECT
    dat$Subject=factor(paste("s",sprintf("%02d",dat$Subject),sep=""))
    # TRIAL
    dat$Trial = rep(x = 1:number_of_trials_per_subject, times = number_of_valid_subjects)
    # make a centred Trial for modeling
    dat$c_Trl <-dat$Trial - mean(dat$Trial)
    # make a scaled Trial for modelling
    dat$s_Trl <- as.numeric(scale(dat$Trial))
    # ID
    # id is a unique identifier for the 7680 row data
    dat$id <- factor(paste(paste(dat$Subject),
                                                 paste("t", sprintf("%03d", dat$Trial), sep="") , sep=":"))
    # ITEM
    # create a centred numeric item variable for modeling
    dat$c_Itm <- ifelse(dat$Item==1, -.75,
                                            ifelse(dat$Item==2, -.25,
                                                         ifelse(dat$Item==3, .25, .75)))
    # make Item be a factor and assign labels
    dat$Item <- factor(dat$Item, levels=c(1,2,3,4), labels=c("06:15:24", "16:25:34", "26:35:44", "36:45:54"))
    # add ratios for Item
    item_range_ratio = c(6 / 24, 16 / 34, 26 / 44, 36 / 54)
    # 0.2500000 0.4705882 0.5909091 0.6666667
    item_range_ratio_scaled = as.vector(scale(c(6 / 24, 16 / 34, 26 / 44, 36 / 54)))
    # -1.3441995 -0.1316642  0.5297187  0.9461450
    item_mean_ratio = c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54)))
    # 0.5125000 0.6876471 0.7691558 0.8166667
    item_mean_ratio_scaled = as.vector(scale(c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54)))))
    # -1.37582241 -0.06614191  0.54334858 0.89861574
    dat[dat$Item == "06:15:24", "item_range_ratio"] <-  0.2500000
    dat[dat$Item == "16:25:34", "item_range_ratio"] <-  0.4705882
    dat[dat$Item == "26:35:44", "item_range_ratio"] <-  0.5909091
    dat[dat$Item == "36:45:54", "item_range_ratio"] <-  0.6666667
    dat[dat$Item == "06:15:24", "item_range_ratio_scaled"] <-  -1.3441995
    dat[dat$Item == "16:25:34", "item_range_ratio_scaled"] <-  -0.1316642
    dat[dat$Item == "26:35:44", "item_range_ratio_scaled"] <-   0.5297187
    dat[dat$Item == "36:45:54", "item_range_ratio_scaled"] <-   0.9461450
    dat[dat$Item == "06:15:24", "item_mean_ratio"] <-  0.5125000
    dat[dat$Item == "16:25:34", "item_mean_ratio"] <-  0.6876471
    dat[dat$Item == "26:35:44", "item_mean_ratio"] <-  0.7691558
    dat[dat$Item == "36:45:54", "item_mean_ratio"] <-  0.8166667
    dat[dat$Item == "06:15:24", "item_mean_ratio_scaled"] <-   -1.37582241
    dat[dat$Item == "16:25:34", "item_mean_ratio_scaled"] <-   -0.06614191
    dat[dat$Item == "26:35:44", "item_mean_ratio_scaled"] <-    0.54334858
    dat[dat$Item == "36:45:54", "item_mean_ratio_scaled"] <-    0.89861574
    # VAGUENESS
    # Create a factor coding for Vagueness
    dat[ dat$Condition==1 , 'Vagueness'] <- 'Vague'
    dat[ dat$Condition==2 , 'Vagueness'] <- 'Crisp'
    dat[ dat$Condition==3 , 'Vagueness'] <- 'Vague'
    dat[ dat$Condition==4 , 'Vagueness'] <- 'Crisp'
    dat$Vagueness <- as.factor(dat$Vagueness)
    # manually center Vagueness
    dat$c_Vag <- ifelse(dat$Vagueness=='Crisp', -.5, .5)
    # NUMBER
    # Create a factor coding for Number use
    dat[ dat$Condition==1 , 'Number'] <- 'Numeric'
    dat[ dat$Condition==2 , 'Number'] <- 'Numeric'
    dat[ dat$Condition==3 , 'Number'] <- 'Verbal'
    dat[ dat$Condition==4 , 'Number'] <- 'Verbal'
    dat$Number <- as.factor(dat$Number)
    # manually center Number
    dat$c_Num <- ifelse(dat$Number=='Numeric', -.5, .5)
    # CONDITION
    # make a factor out of Condition, as f_Cnd
    dat$f_Cnd <- factor(dat$Condition, levels=c(1,2,3,4), labels=c('Vg:Nm', 'Cr:Nm', 'Vg:Vb', 'Cr:Vb'))
    # ORDER
    # give the levels of Order meaningful names
    dat$Order <- factor(dat$Order, levels=c(1,2), labels=c('LtoR','RtoL'))
    # make a manually centred Order
    dat$c_Ord <- ifelse(dat$Order=="LtoR",-.5,.5)
  # QUANTITY
    # give the levels of Quantity meaningful names
    dat$Quantity <- factor(dat$Quantity, levels=c(1,2), labels=c('Small','Large'))
    # make a manually centred Quantity
    dat$c_Qty <- ifelse(dat$Quantity=="Small",-.5,.5)
    # INSTRUCTION
    # add number of characters in the instruction # 29 30 34 36 38
    dat$nchar_instr = nchar(dat$Instruction)
    dat$nchar_instr_scaled = as.vector(scale(nchar(dat$Instruction), scale=TRUE))
    # make Instruction be a factor (17 levels)
    dat$Instruction <- as.factor(dat$Instruction)
    # RT
    # add transformations of RT
    dat$RT_RecSqd    <- 1/dat$RT^2
    dat$RT_Rec       <- 1/dat$RT
    dat$RT_RecSqt    <- 1/sqrt(dat$RT)
    dat$RT_log       <- log(dat$RT)
    dat$RT_sqt       <- sqrt(dat$RT)
    dat$RT_raw       <- dat$RT
    dat$RT_sqd       <- dat$RT^2
  # print to file a table with information about the design
    design_info <- unique(subset(dat, select=c(Item, Condition, Vagueness, Number, Quantity, Order,
                                                                                         Left, Mid, Right, Exp_Num, Bline_Num, Extr_Num, Exp_side, Bline_side, Extr_side,Instruction)))
    design.info <- design_info[order(design_info$Item, design_info$Condition, design_info$Quantity, design_info$Order),]
    row.names(design_info) <- NULL
    capture.output(print.data.frame(design_info, row.names=F, print.gap=3, quote=F, right=F),
                                 file="design_info.table")
    # put dat in better column order
    dat <- subset(dat,
                                select = c(id, Subject, Trial, Condition, Order, Quantity, Vagueness, Number,
                                                     Item, item_range_ratio, item_range_ratio_scaled, item_mean_ratio, item_mean_ratio_scaled,
                                                     c_Trl, s_Trl, c_Itm, c_Vag, c_Num, f_Cnd, c_Ord, c_Qty,
                                                     RT, RT_RecSqd, RT_Rec, RT_RecSqt, RT_log, RT_sqt, RT_raw, RT_sqd,
                                                     Exp_Num, Bline_Num, Extr_Num, Exp_side, Bline_side, Extr_side, response_num, response_side, response_category, Left, Mid, Right,
                                                     Instruction, nchar_instr
                                ))
    # save dat as dat.Rda
    # This data set contains *all* trials 7680 including impossible trials and is
    # mainly for graphs comparing different removals
    save(dat, file="dat.Rda")
    # remove impossible trials from dat in new data dd #
    # Throw out RT = 1 and RT = 59998, and RTprev = 1 and RTprev = 59998
    # i.e., throw out sticky fingers and timeouts,
    # and the trials that followed sticky fingers and timeouts
    # since they were likely affected by unusual previous trials.
    # lose impossible trials
    dd <- dat
    dd$RT[dd$RT == 1 ] <- NA
    dd$RT[dd$RT == 59998 ] <- NA
    dd <- dd[complete.cases(dd),]
    row.names(dd) <- NULL
    # add preceding RT: because we removed impossible trials, the value for preceding RT for a trial following an impossible trial is the value of the trial that preceded the impossible trial.
    dd$RTprev <- NA
    for (s in levels(dd$Subject)) {
        nrows = nrow(dd[dd$Subject==s,])
        for (i in 1:nrows) {
            if (i==1) {
                dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i]
            }
            else
                dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i-1]
        }
    }
    # add transformations of previous RT
    dd$RTprev_RecSqd    <- 1/dd$RTprev^2
    dd$RTprev_Rec       <- 1/dd$RTprev
    dd$RTprev_RecSqt0   <- 1/sqrt(dd$RTprev)
    dd$RTprev_RecSqt    <- as.vector(scale( 1/sqrt(dd$RTprev)  ))
    dd$RTprev_log       <- log(dd$RTprev)
    dd$RTprev_sqt       <- sqrt(dd$RTprev)
    dd$RTprev_raw       <- dd$RTprev
    dd$RTprev_sqd       <- dd$RTprev^2
    # number of higher-level cells in the design (abstracting over Order) =
    # Item(4) * Number(2) * Vagueness(2) * Quantity(2) = 32
    # number of measurements in eaach cell = 8
    # 332 * 8 = 256
    # 256 * 30 = 7680
    dd$measurement=0
    dd$cell=0
    for (s in levels(dd$Subject)) {
        cellcount=0
        for (i in levels(dd$Item)) {
            for (n in levels(dd$Number)) {
                for (v in levels(dd$Vagueness)) {
                    for (q in levels(dd$Quantity)) {
                        measurementcount=0
                        cellcount=cellcount+1
                        for (t in dd[dd$Subject==s & dd$Item==i & dd$Number==n & dd$Vagueness==v & dd$Quantity == q, "Trial"]) {
                            dd[dd$Subject==s & dd$Item==i & dd$Number==n & dd$Vagueness==v & dd$Quantity == q & dd$Trial==t, "cell"] <- cellcount
                            measurementcount=measurementcount+1
                            dd[dd$Subject==s & dd$Item==i & dd$Number==n & dd$Vagueness==v & dd$Quantity == q & dd$Trial==t, "measurement"] = measurementcount
                        }
                    }
                }
            }
        }
    }
    # put dd in better column order
    dd <- subset(dd,
                             select = c(id, Subject, Trial, Condition, Order, Quantity, Vagueness, Number,
                                                 Item, item_range_ratio, item_range_ratio_scaled, item_mean_ratio, item_mean_ratio_scaled,
                                                 c_Trl, s_Trl, c_Itm, c_Vag, c_Num, f_Cnd, c_Ord, c_Qty,
                                                 RT, RT_RecSqd, RT_Rec, RT_RecSqt, RT_log, RT_sqt, RT_raw, RT_sqd,
                                                 RTprev, RTprev_RecSqd, RTprev_Rec, RTprev_RecSqt, RTprev_log, RTprev_sqt, RTprev_raw, RTprev_sqd,
                                                 Exp_Num, Bline_Num, Extr_Num, Exp_side, Bline_side, Extr_side, response_num, response_side, response_category, Left, Mid, Right,
                                                 Instruction, nchar_instr,
                                                 cell, measurement
                             ))
    save(dd, file="dd.Rda")
} # end of the else clause, which is run if both of the files dat.Rda and dd.Rda do not exist.

Compare distributions of the various transformations of RT against random samples from normal distributions with the same mean and sd to see which transformations best approximate normal distributions.

## plot the comparison of the transformations against their normal equivalents to see distributional properties using full data
# first create a data frame specially for transformations
# this won't be used for analysis, just for this plot
dat_transforms <- dat
# dfSubset says whether any data points were removed
dat_transforms$dfSubset <- "original"
# RTtype says whether the row contains data for an observed RT or 
# a sample from a normal distribution with the same mean and sd
dat_transforms$RTtype <- "observed"
# create extra rows for dfs with removed (outlier) data points
dat1 <- dat_transforms                                       # nrow 7680
dat2 <- subset(dat_transforms, RT>1)                         # nrow 7678
dat2$dfSubset <- "RT>1"
dat3 <- subset(dat_transforms, RT>1 & RT<59999)              # nrow 7677
dat3$dfSubset <- "RT>1&RT<59999"
dat4 <- subset(dat_transforms, RT>1 & RT<59999 & RT<30001)   # nrow 7658
dat4$dfSubset <- "RT>1&RT<30001"
# bring the dfs back into main df
dat_transforms <- rbind(dat1, dat2, dat3, dat4)
dat_transforms$dfSubset <- factor(dat_transforms$dfSubset, levels=c("original", "RT>1", "RT>1&RT<59999", "RT>1&RT<30001"))
# create extra rows for normally distributed transformed variables
dato = dat_transforms
datn = dat_transforms; datn$RTtype <- "normal"
dat_transforms <- rbind(dato,datn)
dat_transforms$RTtype <- factor(dat_transforms$RTtype, levels=c("observed","normal"))
# for safety
dat_transforms[dat_transforms$RTtype=="normal", c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd")] <- NA
# Create the normally distributed transformed variables, replacing the NAs declared above
for (k in levels(dat_transforms$dfSubset)){
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqd"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k  & dat_transforms$RTtype=="observed", "RT_RecSqd"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_RecSqd"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_Rec"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k  & dat_transforms$RTtype=="observed", "RT_Rec"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_Rec"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqt"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k  & dat_transforms$RTtype=="observed", "RT_RecSqt"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_RecSqt"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_log"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k  & dat_transforms$RTtype=="observed", "RT_log"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_log"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqt"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k  & dat_transforms$RTtype=="observed", "RT_sqt"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_sqt"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_raw"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k  & dat_transforms$RTtype=="observed", "RT_raw"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_raw"]))
  dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqd"]  <- 
    rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k  & dat_transforms$RTtype=="observed", "RT_sqd"]), 
          mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]), 
          sd=sd(dat_transforms[dat_transforms$dfSubset==k     & dat_transforms$RTtype=="observed", "RT_sqd"]))
} # end of create normally distributed equivalents for the data transforms.
temp <- reshape2::melt(dat_transforms, 
             id.vars=c("dfSubset", "RTtype", "Item", "Vagueness", "Number", "Quantity", "Order"),
             measure.vars=c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd"),
             variable.name="transformation",
             value.name="score")
ggplot(temp, aes(score, colour=RTtype)) + 
  geom_freqpoly() + 
  facet_wrap(dfSubset~transformation, scales="free", nrow=4) +
  theme_bw() + 
    theme(aspect.ratio=1) + 
    scale_color_manual(values=c("blue","red"))

Now working with a data set without impossible trials.

Show how transformations of RT affect distribution of times, and how they affect which times are outliers.

# allow reference to transformation
subdata = reshape2::melt(dd,
               measure.vars=c("RT_Rec", "RT_RecSqt", "RT_log", "RT_raw"),
               variable.name="transformation",
               value.name="value") 
# boxplots for subjects and items separately for each dependent variable
ggplot(subdata) + 
  facet_wrap(~ transformation, scales="free", ncol=4) +
  geom_boxplot(aes(y=value, x=Item, col="Items")) + 
  geom_boxplot(aes(y=value, x=Subject, col="Subjects")) + 
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), aspect.ratio=1, panel.grid=element_blank()) + 
  scale_colour_manual("",values=c("Items"="red","Subjects"="blue")) +
  xlab("") + ylab("")

Show how mean times for individual subjects and items vary with respect to the grand mean RT.

# Show how mean times for individual subjects and items vary with respect to the grand mean RT.
subs=ddply(dd, .(level=Subject), 
                     summarise, effect="subject", emean=mean(dd$RT), lmean=mean(RT),
                     ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0, "slow", "fast"))
itms=ddply(dd, .(level=Item), 
                     summarise, effect="item", emean=mean(dd$RT), lmean=mean(RT), 
                     ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0, "slow", "fast"))
si=rbind(subs,itms)
si$effect <- factor(si$effect, levels=c("subject", "item"))
si$lsign <- factor(si$lsign, levels=c("slow","fast"))
my1 <- xyplot(data=si, lmean~level | effect, 
              aspect=1,
              layout=c(2,1),
              ylim = extendrange(si$lmean, f = 0.10),
              par.settings=list(trellis.par.set(superpose.symbol = list(col = c("blue","red"), pch=19), 
                                                                                superpose.line=list(col = c("blue","red")))),
              groups=si$lsign,
              xlab=NULL,
              ylab="RT (in ms)",
              key=list(text=list(levels(si$lsign)), 
                             lines=list(lty=c(1,1), col=trellis.par.get('superpose.line')$col, 
                                                 pch=c(19,19), type='o'), divide=1),
              mean=unique(si$emean),
              scales=list(x=list(relation="free", at=list(15.5, 32.5), 
                                                 labels=list("subject number", "item identifier"), cex=1 ),
                          y=list(at=c(0, 2000, unique(si$emean), 4000, 6000, 8000,  10000), 
                                     labels=c(0, 2000,  "Mean", 4000,  6000, 8000, 10000), cex=.9) ),
              panel=function(x,y,type,subscripts,groups=groups,...){
                panel.xyplot(x,y,type='p',groups=groups, subscripts=subscripts,...)
                panel.arrows(x0=x, y0=unique(si$emean), x1=x, y1=y, groups=groups, subscripts=subscripts, 
                                         code=3, length=0, col=trellis.par.get('superpose.line')$col[groups][subscripts], ...)
                panel.abline(h=unique(si$emean), col="lightgrey",lty=1)
                panel.text(x, y=(ifelse(y<unique(si$emean),y-300,y+300)), 
                                     as.numeric(si$level)[subscripts][panel.number()==1], cex=.75)
                panel.text(x, y=(ifelse(y<unique(si$emean),y-300,y+300)), 
                                     as.character(si$level)[subscripts][panel.number()==2], cex=.75)
              }) # end of my1
my2 <- xyplot(data=si, ldiff~level | effect, 
              aspect=1, type='n',
              ylab="Deviation from mean RT (in ms)",
              ylim=extendrange(si$ldiff,f=0.10)
                            ) # creates a second y-axis scale
print(doubleYScale(my1, my2, 
                                     add.axis=TRUE, 
                                     add.ylab2=TRUE, 
                                     use.style=FALSE))

Show how mean times for individual subjects and items vary with respect to the grand mean RT_RecSqt.

# Show how mean times for individual subjects and items vary with respect to the grand mean RT_RecSqt.
subs <- ddply(dd, .(level=Subject), summarise, 
              effect="subject", emean=mean(dd$RT_RecSqt), lmean=mean(RT_RecSqt), 
                            ldiff=mean(RT_RecSqt)-mean(dd$RT_RecSqt),
              lsign=ifelse(mean(RT_RecSqt)-mean(dd$RT_RecSqt)<0,"slow","fast"))
itms <- ddply(dd, .(level=Item), summarise, effect="item", emean=mean(dd$RT_RecSqt), lmean=mean(RT_RecSqt),
              ldiff=mean(RT_RecSqt)-mean(dd$RT_RecSqt), 
                            lsign=ifelse(mean(RT_RecSqt)-mean(dd$RT_RecSqt)<0,"slow","fast"))
si <- rbind(subs,itms)
si$effect <- factor(si$effect, levels=c("subject", "item"))
si$lsign <- factor(si$lsign, levels=c("slow","fast"))
my1 <- xyplot(data=si, lmean~level | effect, 
              aspect=1,
              layout=c(2,1),
              ylim = extendrange(si$lmean, f = 0.10),
              par.settings = list(trellis.par.set(superpose.symbol = list(col = c("blue","red"), pch=19), superpose.line=list(col = c("blue","red")))),
              groups = si$lsign,
              xlab = NULL,
              ylab = "RT_RecSqt: reciprocal of square root of RT in ms\n",
              key=list(text=list(levels(si$lsign)), 
                             lines=list(lty=c(1,1), 
                                                 col=trellis.par.get('superpose.line')$col,
                                                 pch=c(19,19), type='o'),
                             divide=1),
              mean = unique(si$emean),
              scales = list(x=list(relation="free", at=list(15.5, 32.5), 
                                                     labels=list("subject number", "item identifier"), cex=1 ) ),
              panel=function(x,y,type,subscripts,groups=groups,...){
                panel.xyplot(x,y,type='p',groups=groups, subscripts=subscripts,...)
                panel.arrows(x0=x, y0=unique(si$emean), x1=x, y1=y, 
                                         groups=groups, subscripts=subscripts, code=3, length=0, col=trellis.par.get('superpose.line')$col[groups][subscripts], ...)
                panel.abline(h=unique(si$emean), col="lightgrey",lty=1)
                panel.text(x, y=(ifelse(y<unique(si$emean),y-.001,y+.001)), 
                                     as.numeric(si$level)[subscripts][panel.number()==1], cex=.75)
                panel.text(x, y=(ifelse(y<unique(si$emean),y-.001,y+.001)), 
                                     as.character(si$level)[subscripts][panel.number()==2], cex=.75)
              })
my2 <- xyplot(data=si, ldiff~level | effect, 
              aspect=1, type='n',
              ylab="\nDeviation from mean RT_RecSqt (in ms)",
              ylim=extendrange(si$ldiff,f=0.10))
print(doubleYScale(my1, my2, add.axis=TRUE, add.ylab2=TRUE, use.style=FALSE))

Show how practice and fatigue effects vary over subjects

# Show how practice/fatigue effects vary over subjects
ggplot(dd, aes(y=RT_RecSqt, x=Trial)) + 
    facet_wrap(~Subject,nrow=3) + 
    theme(panel.grid=element_blank(), 
                panel.margin = unit(0, "lines"), 
                panel.background = element_rect(fill = 'white', colour='black' ), 
                legend.position="top") + 
    scale_x_continuous("Trial", breaks=NULL) + 
    geom_point(pch=19, cex=.25, alpha=.5) + 
    stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) + 
    theme(aspect.ratio = 1) 

Show practice and fatigue effects vary over items.

# Show how practice/fatigue effects vary over items
ggplot(dd, aes(y=RT_RecSqt, x=Trial)) + 
    facet_grid(~Item) + 
    theme(panel.grid=element_blank(), panel.margin = unit(0, "lines"), 
                panel.background = element_rect(fill = 'white', colour='black' ), 
                legend.position="top") + 
    scale_x_continuous("Trial") + 
    geom_point(pch=19, cex=.25, alpha=.5) + 
    stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) + 
    theme(aspect.ratio = 1) 

Plot main effects and interactions

Main effects

Plot main effects on several transformations

# plot main effects on observed data
# allow reference to transformation
temp <- reshape2::melt(dd,
             id.vars=c("Item", "Vagueness", "Number", "Quantity", "Order"),
             measure.vars=c("RT_Rec", "RT_RecSqt", "RT_log",  "RT_raw"),
             variable.name="transformation",
             value.name="score")
# allow reference to effect
temp2 <- reshape2::melt(temp, 
              id.vars=c("transformation", "score"),
              measure.vars=c("Item", "Vagueness", "Number", "Quantity", "Order"),
              variable.name="effect",
              value.name="level")
# do the plot
ggplot(temp2, aes(y=score, x=level, group=1, col=effect)) +
  facet_wrap(~transformation + effect, scales="free", shrink=TRUE, ncol=5) +
  stat_summary(fun.data=mean_cl_normal, geom="pointrange") +
  stat_summary(fun.y=mean, geom="line") +
  theme(aspect.ratio=1, axis.text.y=element_blank())

Main effects

# prep main effects rts data frame
vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness")));names(vrts)[3]<-"Levels"
nrts <- cbind(Effect="Number",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number")));names(nrts)[3]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Quantity")));names(qrts)[3]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Order")));names(orts)[3]<-"Levels"
irts <- cbind(Effect="Item",x=factor(c(1,2,3,4)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Item")));names(irts)[3]<-"Levels"
rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
# plot main effects
ggplot(rts, aes(y=RT_RecSqt, x=Levels, group=1, ymin=mins, ymax=maxs)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_grid(~Effect, scale="free_x") + 
  theme_bw() +
  theme(aspect.ratio=1) 

Main effects over items

vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Vagueness")));names(vrts)[4]<-"Levels"
nrts <- cbind(Effect="Number",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Number")));names(nrts)[4]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Quantity")));names(qrts)[4]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Order")));names(orts)[4]<-"Levels"
rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
ggplot(rts, 
             aes(y=RT_RecSqt, x=item_mean_ratio, group=Levels, ymin=mins, 
                    ymax=maxs, shape=Levels, fill=Levels, col=Levels)) +
  scale_shape_manual(values=c(21,21,22,22,23,23,24,24)) +
  scale_fill_manual(values=c("black", "gray", "black", "gray", "black", "gray", "black", "gray")) +
  scale_color_manual(values=c("black", "gray", "black", "gray", "black", "gray", "black", "gray")) +
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_grid(~Effect) + 
  theme_bw() +
  theme(aspect.ratio=1, 
        panel.grid=element_blank()) +
  scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1)) 

2 way interactions not involving items.

# prep 2 ways rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Number")); names(rts1)[1] <- "F1"; names(rts1)[2] <- "F2"
rts1$Effect <- rep("Vagueness:Number",4)
rts2 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Quantity")); names(rts2)[1] <- "F1"; names(rts2)[2] <- "F2"
rts2$Effect <- rep("Vagueness:Quantity",4)
rts3 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Order")); names(rts3)[1] <- "F1"; names(rts3)[2] <- "F2"
rts3$Effect <- rep("Vagueness:Order",4)
rts5 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Quantity")); names(rts5)[1] <- "F1"; names(rts5)[2] <- "F2"
rts5$Effect <- rep("Number:Quantity",4)
rts6 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Order")); names(rts6)[1] <- "F1"; names(rts6)[2] <- "F2"
rts6$Effect <- rep("Number:Order",4)
rts8 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Quantity","Order")); names(rts8)[1] <- "F1"; names(rts8)[2] <- "F2"
rts8$Effect <- rep("Quantity:Order",4)
rts <- rbind(rts1,rts2,rts3,rts5,rts6,rts8)
rts <- subset(rts, select=c(Effect, F1, F2, RT_RecSqt, RT_RecSqt_norm, sd, se, ci))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
# plot 2 way interactions
ggplot(rts, aes(y=RT_RecSqt, x=F2, group=F1, ymin=mins, ymax=maxs, pch=F1)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_wrap(~Effect, scale="free_x", nrow=1) + 
  theme_bw() +
  theme(aspect.ratio=1) +
  theme(legend.title=element_blank()) +
  xlab("Levels")

Plot 2 way interactions over items

# prep 2 ways over items rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Vagueness","Number")); names(rts1)[2] <- "F1"; names(rts1)[3] <- "F2"
rts1$E1 <- "Vagueness"
rts1$E2 <- "Number"
rts2 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Vagueness","Quantity")); names(rts2)[2] <- "F1"; names(rts2)[3] <- "F2"
rts2$E1 <- "Vagueness"
rts2$E2 <- "Quantity"
rts3 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Vagueness","Order")); names(rts3)[2] <- "F1"; names(rts3)[3] <- "F2"
rts3$E1 <- "Vagueness"
rts3$E2 <- "Order"
rts4 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Quantity", "Number")); names(rts4)[2] <- "F1"; names(rts4)[3] <- "F2"
rts4$E1 <- "Quantity"
rts4$E2 <- "Number"
rts5 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Order","Number")); names(rts5)[2] <- "F1"; names(rts5)[3] <- "F2"
rts5$E1 <- "Order"
rts5$E2 <- "Number"
rts6 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Quantity","Order")); names(rts6)[2] <- "F1"; names(rts6)[3] <- "F2"
rts6$E1 <- "Quantity"
rts6$E2 <- "Order"
rts <- rbind(rts1,rts2,rts3,rts4,rts5,rts6)
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
rts$F3 <- factor(paste(rts$F1,rts$F2))
rts$E1 <- factor(rts$E1, levels=c("Number","Order","Quantity","Vagueness"))
rts$E2 <- factor(rts$E2, levels=c("Number","Order","Quantity","Vagueness"))
# The palette with black:
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
ggplot(rts, aes(y=RT_RecSqt, x=item_mean_ratio, group=F3, col=F2, pch=F1)) + 
  facet_grid(E2~E1, drop=F) + 
  geom_point(cex=3) + 
    geom_line() + 
    theme_bw() + 
  theme(aspect.ratio=1, panel.grid=element_blank()) +
  scale_color_manual(name="colour",values=cbbPalette) +
  scale_shape_discrete(name="shape") +
  scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1))

Plot vagueness by number interaction over items

rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Item"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
dodge = position_dodge(width=0.2)
ggplot(rts, aes(y=RT_RecSqt, x=Item, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci, 
                group=condition, shape=condition, fill=condition)) +
  geom_line(position=dodge) + 
  geom_errorbar(width=.2, position=dodge) + 
  geom_point(size=4, position=dodge) + 
  scale_shape_manual("",values = c(22, 22, 21, 21)) + 
  scale_fill_manual("",values=c("black","white","black","white")) +
  ggtitle("Response time") + 
  ylab("RT_RecSqt") + 
  xlab("") +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
            legend.key = element_blank(), aspect.ratio=1, 
            axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(~Number)

3 way interactions

rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Quantity"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p1=ggplot(rts, aes(y=RT_RecSqt, x=Number, group=Vagueness, ymin=mins, ymax=maxs, pch=Vagueness)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_grid(~Quantity, scale="free_x") + 
  theme_bw() +
  theme(aspect.ratio=1) +
  theme(legend.title=element_blank(), legend.position="top")
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Order"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p2=ggplot(rts, aes(y=RT_RecSqt, x=Number, group=Vagueness, ymin=mins, ymax=maxs, pch=Vagueness)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_grid(~Order, scale="free_x") + 
  theme_bw() +
  theme(aspect.ratio=1) +
  theme(legend.title=element_blank(), legend.position="top")
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Quantity","Order"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p3=ggplot(rts, aes(y=RT_RecSqt, x=Number, group=Quantity, ymin=mins, ymax=maxs, pch=Quantity)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_grid(~Order, scale="free_x") + 
  theme_bw() +
  theme(aspect.ratio=1) +
  theme(legend.title=element_blank(), legend.position="top")
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Quantity","Order"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p4=ggplot(rts, aes(y=RT_RecSqt, x=Vagueness, group=Quantity, ymin=mins, ymax=maxs, pch=Quantity)) + 
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  facet_grid(~Order, scale="free_x") + 
  theme_bw() +
  theme(aspect.ratio=1) +
  theme(legend.title=element_blank(), legend.position="top")
grid.arrange(nrow=2, p1,p2,p3,p4)

Plot vagueness by number by quantity over items.

rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Item", "Quantity"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
dodge = position_dodge(width=0.2)
ggplot(rts, aes(y=RT_RecSqt, x=Item, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci, 
                group=Vagueness, 
                fill=Vagueness)) +
  geom_line(position=dodge) + 
  geom_errorbar(width=.2, position=dodge) + 
  geom_point(pch=21, size=4, position=dodge) + 
  scale_fill_manual("",values=c("black","white")) +
  ggtitle("Response time") + 
  ylab("RT_Rec_Sqt") + 
  xlab("") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.background=element_rect(fill="white",color=1),
        legend.key = element_blank(), 
        aspect.ratio=1, axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(~Number+Quantity)

4 way interaction

rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number", "Vagueness",  "Quantity", "Order"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
ggplot(rts, aes(y=RT_RecSqt, x=Vagueness, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci, group=Quantity, pch=Quantity)) +
  facet_grid(~Order+Number, scale="free_x") +
  geom_line() +
  geom_errorbar(width=.1) +
  geom_point(cex=3) + 
  theme_bw() +
  theme(aspect.ratio=1) +
  theme(legend.title=element_blank())

4 way interaction split over items

rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number", "Vagueness", "Item", "Quantity", "Order"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
ggplot(rts, aes(y=RT_RecSqt, x=Item, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci, 
                group=Vagueness,fill=Vagueness)) +
  geom_line() + 
  geom_errorbar(width=.12) + 
  geom_point(pch=21, size=2) + 
  scale_fill_manual("",values=c("black","white","black","white")) +
  ggtitle("Number * Vagueness * Quantity * Order") + 
  ylab("RT_RecSqt") + 
  xlab("") +
  theme_bw() +
  theme( legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1),
             axis.text.x = element_text(angle = 45, hjust = 1), aspect.ratio=1) +
  facet_grid(Vagueness+Order~Number+Quantity)

4 way interaction split over items showing ratio of item dots.

rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number", "Vagueness", "item_mean_ratio", "Quantity", "Order"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
#dodge = position_dodge(width=0.2)
ggplot(rts, aes(y=RT_RecSqt, x=item_mean_ratio, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci, 
                group=Vagueness,fill=Vagueness)) +
  # geom_line(position=dodge) + 
    geom_line() +
  geom_errorbar(width=.02) + 
  geom_point(pch=21, size=2) + 
  scale_fill_manual("",values=c("black","white","black","white")) +
  ggtitle("Number * Vagueness * Quantity * Order") + 
  ylab("RT_RecSqt") + 
  xlab("") +
  theme_bw() +
  theme( legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1), 
             axis.text.x = element_text(angle = 45, hjust = 1), aspect.ratio=1) +
  facet_grid(Vagueness+Order~Number+Quantity)

Show compression of item ratios

ols model v4, RecSqt, in progress

Pairs plot

pairs(dd[,c("RT_RecSqt", "Number", "Vagueness", "RTprev_RecSqt")], pch = 19, cex=.075)

Build the model

## do ols model
# define datadist
dd.dd = datadist(dd)
options(datadist = "dd.dd")
# build model
v4 <- ols(data=dd, 
          RT_RecSqt ~ 
            c_Vag + c_Num + c_Qty + c_Ord + 
            c_Num:c_Vag:c_Qty+
            item_mean_ratio + 
            #c_Vag:c_Num + 
            #c_Vag:c_Num:item_mean_ratio + 
            # pol(s_Trl, 3) +
            s_Trl+
            # pol(RTprev_RecSqt,3) +
            RTprev_RecSqt +
            nchar_instr
            # cell+
            # measurement,
            ,
          x=T, y=T )
# print out p value for likelihood ratio with df
cat("lik.ratio p value, whether the model as a whole is explanatory: p =", 1-pchisq(v4$stats[2],v4$stats[3]))
## lik.ratio p value, whether the model as a whole is explanatory: p = 0
# print out R2 for the model
cat("R^2 = ", v4$stats[4])
## R^2 =  0.2898629

Show the model tables

# show model
v4
## 
## Linear Regression Model
## 
## ols(formula = RT_RecSqt ~ c_Vag + c_Num + c_Qty + c_Ord + c_Num:c_Vag:c_Qty + 
##     item_mean_ratio + s_Trl + RTprev_RecSqt + nchar_instr, data = dd, 
##     x = T, y = T)
##                 Model Likelihood     Discrimination    
##                    Ratio Test           Indexes        
## Obs     7677    LR chi2   2627.82    R2       0.290    
## sigma 0.0062    d.f.            9    R2 adj   0.289    
## d.f.    7667    Pr(> chi2) 0.0000    g        0.004    
## 
## Residuals
## 
##        Min         1Q     Median         3Q        Max 
## -0.0209659 -0.0040909  0.0000796  0.0042312  0.0196345 
## 
##                       Coef    S.E.   t      Pr(>|t|)
## Intercept              0.0314 0.0009  35.36 <0.0001 
## c_Vag                 -0.0007 0.0001  -4.57 <0.0001 
## c_Num                  0.0043 0.0001  29.65 <0.0001 
## c_Qty                  0.0009 0.0001   6.26 <0.0001 
## c_Ord                 -0.0004 0.0001  -2.57 0.0103  
## item_mean_ratio       -0.0078 0.0006 -12.75 <0.0001 
## s_Trl                  0.0007 0.0001  10.12 <0.0001 
## RTprev_RecSqt          0.0030 0.0001  42.11 <0.0001 
## nchar_instr           -0.0001 0.0000  -2.79 0.0053  
## c_Vag * c_Num * c_Qty -0.0011 0.0006  -1.93 0.0537
# show model summary
summary(v4)
##              Effects              Response : RT_RecSqt 
## 
##  Factor          Low      High     Diff.    Effect      S.E.      
##  c_Vag           -0.50000  0.50000 1.000000 -0.00094483 2.0041e-04
##  c_Num           -0.50000  0.50000 1.000000  0.00400970 2.0586e-04
##  c_Qty           -0.50000  0.50000 1.000000  0.00060798 1.9988e-04
##  c_Ord           -0.50000  0.50000 1.000000 -0.00036185 1.4096e-04
##  item_mean_ratio  0.68765  0.76916 0.081509 -0.00063266 4.9612e-05
##  s_Trl           -0.85921  0.87274 1.732000  0.00125240 1.2380e-04
##  RTprev_RecSqt   -0.61685  0.63201 1.248900  0.00375950 8.9289e-05
##  nchar_instr     30.00000 36.00000 6.000000 -0.00040202 1.4411e-04
##  Lower 0.95  Upper 0.95 
##  -0.00133770 -5.5197e-04
##   0.00360610  4.4132e-03
##   0.00021616  9.9980e-04
##  -0.00063817 -8.5528e-05
##  -0.00072991 -5.3541e-04
##   0.00100980  1.4951e-03
##   0.00358450  3.9346e-03
##  -0.00068451 -1.1952e-04
## 
## Adjusted to: c_Vag=-0.5 c_Num=-0.5 c_Qty=-0.5
# do anova 
an1 = anova(v4)
## Warning in anova.rms(v4): tests of nonlinear interaction with respect to single component 
## variables ignore 3-way interactions
# show anova
an1
##                 Analysis of Variance          Response: RT_RecSqt 
## 
##  Factor                                               d.f. Partial SS  
##  c_Vag  (Factor+Higher Order Factors)                    2 0.0009653558
##   All Interactions                                       1 0.0001419055
##  c_Num  (Factor+Higher Order Factors)                    2 0.0338247101
##   All Interactions                                       1 0.0001419055
##  c_Qty  (Factor+Higher Order Factors)                    2 0.0016309290
##   All Interactions                                       1 0.0001419055
##  c_Ord                                                   1 0.0002512303
##  item_mean_ratio                                         1 0.0061997569
##  s_Trl                                                   1 0.0039022370
##  RTprev_RecSqt                                           1 0.0675901413
##  nchar_instr                                             1 0.0002967003
##  c_Vag * c_Num * c_Qty  (Factor+Higher Order Factors)    1 0.0001419055
##  REGRESSION                                              9 0.1193124785
##  ERROR                                                7667 0.2923044952
##  MS           F       P     
##  4.826779e-04   12.66 <.0001
##  1.419055e-04    3.72 0.0537
##  1.691236e-02  443.60 <.0001
##  1.419055e-04    3.72 0.0537
##  8.154645e-04   21.39 <.0001
##  1.419055e-04    3.72 0.0537
##  2.512303e-04    6.59 0.0103
##  6.199757e-03  162.62 <.0001
##  3.902237e-03  102.35 <.0001
##  6.759014e-02 1772.86 <.0001
##  2.967003e-04    7.78 0.0053
##  1.419055e-04    3.72 0.0537
##  1.325694e-02  347.72 <.0001
##  3.812502e-05

plot ‘partial effects’

## plot partial effects
# Compute Predicted Values and Confidence Limits
p1=Predict(v4)
# plot 'partial effects'
plot(p1, anova=an1, pval=T, aspect=1, main="ols model v4")

# add predicted RT_RecSqt to dd
dd$RT_Predicted <- predict (v4)

Plot model coefficients and ci’s

par(las=2, mar=c(14,6,1,1))
y=coef(v4) # with intercept
n=length(y)
y0=confint(v4)[,1]
y1=confint(v4)[,2]
y=coef(v4)[-1] # omit intercept
n=length(y)
y0=confint(v4)[-1,1]
y1=confint(v4)[-1,2]
plot(y, xaxt="n", xlab="", ylab="RT_RecSqt\n", pch=19, ylim=extendrange(y, f=.10), main="Speed")
abline(h=0)
axis(1, labels=names(y) , at=1:n)
grid()
segments(x0=1:n, x1=1:n, y0, y1, lwd=2)

Collinearity is assessed by the condition number \(k\).
The greater the collinearity, the closer the matrix of predictors is is to becoming singular.
\(k\) estimates the extent to which a matrix is \(\text{singular}\)
\(0\ldots6\) no collinearity
\(\text{around} 15\) medium collinearity
\(>30\) indicates potentially harmful collinearity

cat("k is:",
collin.fnc(dd[, c("s_Trl", "c_Num", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", 
                                    "RTprev_RecSqt", "nchar_instr", "measurement")
                            ])$cnumber
)
## k is: 38.03642
par(mar=c(1.1,3.1,1.1,1.1), pty='s')
plot(varclus(
    as.matrix(dd[,c("s_Trl", "c_Num", "c_Vag", "c_Qty", "c_Ord",
                                    "item_mean_ratio", "RTprev_RecSqt", "nchar_instr", "measurement")])))

Model criticism baayen plots

# Baayen 4-plot model criticism
par(mfrow=c(1,4), pty='s')
# create scaled residuals
v4$rstand = as.vector(scale(resid(v4)))
# plot scaled residuals density
plot(density(v4$rstand), main='density of\nscaled residuals')
# plot sample quantiles versus theoretical quantiles
qqnorm(v4$rstand, cex=.5)
qqline(v4$rstand)
# plot standardised residuals versus fitted values
plot(v4$rstand ~ fitted(v4), pch='.', main='std resid\nv fitted')
# absolute standardised residuals greater than 2.5 are candidates 
# for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))
# create dffits
dffits=abs(resid(v4, "dffits"))
# plot dffits
plot(dffits, type='h', main='dffits')

# here no overly influential
w = which.influence(fit = v4, cutoff = 0.4)
if (length(w)==0) {
    print('There are no overly influential data points')
} else {
    print('There are some overly influential variables:');
    print(w)
}
## [1] "There are no overly influential data points"

Model validation

Validation tests overfitting using bootstrap.

Bootstrapping draws as a bootstrap sample the same number of samples as the original data, at random with replacement, from the original data.

Then fit the model to the data in the bootstrap sample, and use this model to predict the reaction times for the original full data (which contains many samples that are new to the bootstrap model).

Then compare the goodness of fit of the bootstrap model with the goodness of fit of the original model.

Averaged over a large number of bootstrap models these comparisons of goodness of fit reveal to what extent the original model overfits the data.

Function validate() in rms does this re-sampling validation.

# argument B is the number of bootstrap runs
# argument pr is whether to print the results of each run
v_1 <- validate(v4, bw = T, B = 200, pr=FALSE, estimates=TRUE)
## 
##      Backwards Step-down - Original Model
## 
## No Factors Deleted
## 
## Factors in Final Model
## 
## [1] c_Vag                 c_Num                 c_Qty                
## [4] c_Ord                 item_mean_ratio       s_Trl                
## [7] RTprev_RecSqt         nchar_instr           c_Vag * c_Num * c_Qty
# in print.validate, B is the number of re-samples to show outocmes for each resample
print(v_1, B=0)
##           index.orig training   test optimism index.corrected   n
## R-square      0.2899   0.2907 0.2887   0.0020          0.2878 200
## MSE           0.0000   0.0000 0.0000   0.0000          0.0000 200
## g             0.0045   0.0045 0.0045   0.0000          0.0044 200
## Intercept     0.0000   0.0000 0.0001  -0.0001          0.0001 200
## Slope         1.0000   1.0000 0.9956   0.0044          0.9956 200
# show how often each number of variables kept was observed across the 200 runs
xtabs(~rowSums(attr(v_1,"kept")))
## rowSums(attr(v_1, "kept"))
##  6  7  8  9 
##  2 31 71 96

model lmer

load("dd.Rda")
v5 <- lmerTest::lmer(data=dd, 
           RT_RecSqt ~ 
             c_Vag + c_Num + c_Qty + c_Ord + 
             c_Num:c_Vag:c_Qty +
             item_mean_ratio + 
             s_Trl +
             RTprev_RecSqt +
             nchar_instr +
             (1+c_Vag + c_Num + c_Qty + c_Ord|Subject)
)
summary(v5)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
##   to degrees of freedom [lmerMod]
## Formula: RT_RecSqt ~ c_Vag + c_Num + c_Qty + c_Ord + c_Num:c_Vag:c_Qty +  
##     item_mean_ratio + s_Trl + RTprev_RecSqt + nchar_instr + (1 +  
##     c_Vag + c_Num + c_Qty + c_Ord | Subject)
##    Data: dd
## 
## REML criterion at convergence: -59041.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8627 -0.6682  0.0131  0.6682  3.7743 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.  Corr                   
##  Subject  (Intercept) 1.647e-05 0.0040589                        
##           c_Vag       1.034e-07 0.0003215  0.26                  
##           c_Num       9.648e-06 0.0031061 -0.41 -0.29            
##           c_Qty       1.089e-06 0.0010436  0.19 -0.10 -0.27      
##           c_Ord       2.145e-07 0.0004631 -0.15  0.31 -0.46 -0.68
##  Residual             2.528e-05 0.0050278                        
## Number of obs: 7677, groups:  Subject, 30
## 
## Fixed effects:
##                     Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)        3.132e-02  1.035e-03  1.080e+02  30.268  < 2e-16 ***
## c_Vag             -6.495e-04  1.331e-04  3.300e+01  -4.881 2.57e-05 ***
## c_Num              4.155e-03  5.792e-04  2.900e+01   7.174 6.61e-08 ***
## c_Qty              8.523e-04  2.225e-04  2.900e+01   3.831 0.000627 ***
## c_Ord             -2.833e-04  1.426e-04  4.400e+01  -1.987 0.053149 .  
## item_mean_ratio   -7.953e-03  4.957e-04  7.558e+03 -16.045  < 2e-16 ***
## s_Trl              1.141e-03  5.867e-05  7.564e+03  19.443  < 2e-16 ***
## RTprev_RecSqt      5.765e-04  7.301e-05  7.618e+03   7.897 3.11e-15 ***
## nchar_instr       -6.187e-05  1.956e-05  7.558e+03  -3.163 0.001566 ** 
## c_Vag:c_Num:c_Qty -1.146e-03  4.635e-04  7.558e+03  -2.472 0.013455 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) c_Vag  c_Num  c_Qty  c_Ord  itm_m_ s_Trl  RTp_RS nchr_n
## c_Vag       -0.068                                                        
## c_Num       -0.260 -0.136                                                 
## c_Qty        0.106 -0.035 -0.225                                          
## c_Ord       -0.064  0.082 -0.264 -0.348                                   
## item_men_rt -0.323 -0.004  0.001  0.000  0.000                            
## s_Trl        0.005  0.003 -0.001 -0.002  0.006 -0.017                     
## RTprv_RcSqt  0.002 -0.006  0.007  0.004 -0.016  0.011 -0.206              
## nchar_instr -0.610  0.248 -0.044  0.017  0.000 -0.017  0.001 -0.008       
## c_Vg:c_N:_Q  0.084 -0.034  0.006 -0.002  0.000  0.002 -0.001  0.004 -0.138
cat("R^2")
## R^2
cor(fitted(v5), dd$RT_RecSqt)^2
## [1] 0.534303
par(mfrow=c(2,5))
plotLMER.fnc(v5)
## effect size (range) for  c_Vag is  0.0003630759 
## effect size (range) for  c_Num is  0.003868485 
## effect size (range) for  c_Qty is  0.001138683 
## effect size (range) for  c_Ord is  0.0002832954 
## effect size (range) for  item_mean_ratio is  0.002418943 
## effect size (range) for  s_Trl is  0.003936032 
## effect size (range) for  RTprev_RecSqt is  0.003352036 
## effect size (range) for  nchar_instr is  0.0005568158

Plot model coefficients and ci’s

e=data.frame(coef=summary(v5)$coef[-1,1]) # estimates, without intercept
q=as.data.frame(confint(v5, method='Wald')[18:26,1:2])
eq=cbind(e,q)
y=eq[rev(rownames(eq)),]
par(mar=c(4,10,1,1))
plot(y=1:nrow(y), x=y$coef, xlim=range(y$"2.5 %", y$"97.5 %"), type='n', axes=F, xlab="", ylab="")
segments(y0=1:nrow(y), y1=1:nrow(y), x0=y$"2.5 %", x1=y$"97.5 %", lwd=.55)
points(y=1:nrow(y), x=y$coef, pch=20, cex=.75)
abline(v=0, lty=3, col='grey')
axis(2, labels=row.names(y), at=1:nrow(y), las=1)
axis(1, las=1)
mtext("estimate", side=1, line=2.5, cex.lab=1, las=1)
box(col='grey')

Model criticism baayen plots

# Baayen 4-plot model criticism
par(mfrow=c(1,4), pty='s')
# create scaled residuals
dd$rstand = as.vector(scale(resid(v5)))
# plot scaled residuals density
plot(density(dd$rstand))
# plot sample quantiles versus theoretical quantiles
qqnorm(dd$rstand, cex=.5)
qqline(dd$rstand)
# plot standardised residuals versus fitted values
plot(dd$rstand ~ fitted(v5), pch='.')
# absolute standardised residuals greater than 2.5 are candidates for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))